/***************************************************************************
 *   Copyright (C) 2000-2004 by                                            *
 *   Jean-Christophe Hoelt <jeko@ios-software.com>                         *
 *   Guillaume Borios <gyom@ios-software.com>                              *
 *                                                                          *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.         *
 ***************************************************************************/

//#include "config.h"

#ifdef CPU_X86

#define BUFFPOINTNB 16
#define BUFFPOINTMASK 0xffff
#define BUFFINCR 0xff

#include <stddef.h>
#include "mmx.h"
#include "goom_graphic.h"

#define sqrtperte 16
// faire : a % sqrtperte <=> a & pertemask
#define PERTEMASK 0xf
// faire : a / sqrtperte <=> a >> PERTEDEC
#define PERTEDEC 4

void zoom_filter_mmx (int prevX, int prevY,
              Pixel *expix1, Pixel *expix2,
              int *brutS, int *brutD, int buffratio,
              int precalCoef[16][16])
{
    unsigned int ax = (prevX-1)<<PERTEDEC, ay = (prevY-1)<<PERTEDEC;
        size_t sizeX = prevX;

    int bufsize = prevX * prevY;
    int loop;

    __asm__ __volatile__ ("pxor %mm7,%mm7");

    for (loop=0; loop<bufsize; loop++)
    {
        /*      int couleur; */
        unsigned int px,py;
        size_t pos;
        int coeffs;

        int myPos = loop << 1,
        myPos2 = myPos + 1;
        int brutSmypos = brutS[myPos];

        px = brutSmypos + (((brutD[myPos] - brutSmypos)*buffratio) >> BUFFPOINTNB);
        brutSmypos = brutS[myPos2];
        py = brutSmypos + (((brutD[myPos2] - brutSmypos)*buffratio) >> BUFFPOINTNB);

        if ((py>=ay) || (px>=ax)) {
            pos=coeffs=0;
        }
        else {
            pos = ((px >> PERTEDEC) + prevX * (py >> PERTEDEC));
            // coef en modulo 15
            coeffs = precalCoef [px & PERTEMASK][py & PERTEMASK];
        }

        __asm__ __volatile__ (
        "movd %2, %%mm6 \n\t"

        /* recuperation des deux premiers pixels dans mm0 et mm1 */
        "movq (%3,%1,4), %%mm0 \n\t"	/* b1-v1-r1-a1-b2-v2-r2-a2 */
        "movq %%mm0, %%mm1 \n\t"		/* b1-v1-r1-a1-b2-v2-r2-a2 */

        /* depackage du premier pixel */
        "punpcklbw %%mm7, %%mm0 \n\t"	/* 00-b2-00-v2-00-r2-00-a2 */

        "movq %%mm6, %%mm5 \n\t"		/* ??-??-??-??-c4-c3-c2-c1 */
        /* depackage du 2ieme pixel */
        "punpckhbw %%mm7, %%mm1 \n\t"	/* 00-b1-00-v1-00-r1-00-a1 */

        /* extraction des coefficients... */
        "punpcklbw %%mm5, %%mm6 \n\t"	/* c4-c4-c3-c3-c2-c2-c1-c1 */
        "movq %%mm6, %%mm4 \n\t"		/* c4-c4-c3-c3-c2-c2-c1-c1 */
        "movq %%mm6, %%mm5 \n\t"		/* c4-c4-c3-c3-c2-c2-c1-c1 */

        "punpcklbw %%mm5, %%mm6 \n\t"	/* c2-c2-c2-c2-c1-c1-c1-c1 */
        "punpckhbw %%mm5, %%mm4 \n\t"	/* c4-c4-c4-c4-c3-c3-c3-c3 */

        "movq %%mm6, %%mm3 \n\t"		/* c2-c2-c2-c2-c1-c1-c1-c1 */

        "punpcklbw %%mm7, %%mm6 \n\t"	/* 00-c1-00-c1-00-c1-00-c1 */
        "punpckhbw %%mm7, %%mm3 \n\t"	/* 00-c2-00-c2-00-c2-00-c2 */

        /* multiplication des pixels par les coefficients */
        "pmullw %%mm6, %%mm0 \n\t"		/* c1*b2-c1*v2-c1*r2-c1*a2 */
        "pmullw %%mm3, %%mm1 \n\t"		/* c2*b1-c2*v1-c2*r1-c2*a1 */
        "paddw %%mm1, %%mm0 \n\t"

        /* ...extraction des 2 derniers coefficients */
        "movq %%mm4, %%mm5 \n\t"		/* c4-c4-c4-c4-c3-c3-c3-c3 */
        "punpcklbw %%mm7, %%mm4 \n\t"	/* 00-c3-00-c3-00-c3-00-c3 */
        "punpckhbw %%mm7, %%mm5 \n\t"	/* 00-c4-00-c4-00-c4-00-c4 */

        /* ajouter la longueur de ligne a esi */
        "add %4,%1 \n\t"

        /* recuperation des 2 derniers pixels */
        "movq (%3,%1,4), %%mm1 \n\t"
        "movq %%mm1, %%mm2 \n\t"

        /* depackage des pixels */
        "punpcklbw %%mm7, %%mm1 \n\t"
        "punpckhbw %%mm7, %%mm2 \n\t"

        /* multiplication pas les coeffs */
        "pmullw %%mm4, %%mm1 \n\t"
        "pmullw %%mm5, %%mm2 \n\t"

        /* ajout des valeurs obtenues à la valeur finale */
        "paddw %%mm1, %%mm0 \n\t"
        "paddw %%mm2, %%mm0 \n\t"

        /* division par 256 = 16+16+16+16, puis repackage du pixel final */
        "psrlw $8, %%mm0 \n\t"
        "packuswb %%mm7, %%mm0 \n\t"

        "movd %%mm0,%0 \n\t"
          :"=g"(expix2[loop]),"+r"(pos)
          :"r"(coeffs),"r"(expix1),"g"(sizeX)

        );

        emms();
    }
}

#define DRAWMETHOD_PLUS_MMX(_out,_backbuf,_col) \
{ \
    movd_m2r(_backbuf, mm0); \
    paddusb_m2r(_col, mm0); \
    movd_r2m(mm0, _out); \
}

#define DRAWMETHOD DRAWMETHOD_PLUS_MMX(*p,*p,col)

void draw_line_mmx (Pixel *data, int x1, int y1, int x2, int y2, int col, int screenx, int screeny)
{
    int x, y, dx, dy, yy, xx;
    Pixel *p;

    if ((y1 < 0) || (y2 < 0) || (x1 < 0) || (x2 < 0) || (y1 >= screeny) || (y2 >= screeny) || (x1 >= screenx) || (x2 >= screenx))
        goto end_of_line;

    dx = x2 - x1;
    dy = y2 - y1;
    if (x1 >= x2) {
        int tmp;

        tmp = x1;
        x1 = x2;
        x2 = tmp;
        tmp = y1;
        y1 = y2;
        y2 = tmp;
        dx = x2 - x1;
        dy = y2 - y1;
    }

    /* vertical line */
    if (dx == 0) {
        if (y1 < y2) {
            p = &(data[(screenx * y1) + x1]);
            for (y = y1; y <= y2; y++) {
                DRAWMETHOD;
                p += screenx;
            }
        }
        else {
            p = &(data[(screenx * y2) + x1]);
            for (y = y2; y <= y1; y++) {
                DRAWMETHOD;
                p += screenx;
            }
        }
        goto end_of_line;
    }
    /* horizontal line */
    if (dy == 0) {
        if (x1 < x2) {
            p = &(data[(screenx * y1) + x1]);
            for (x = x1; x <= x2; x++) {
                DRAWMETHOD;
                p++;
            }
            goto end_of_line;
        }
        else {
            p = &(data[(screenx * y1) + x2]);
            for (x = x2; x <= x1; x++) {
                DRAWMETHOD;
                p++;
            }
            goto end_of_line;
        }
    }
    /* 1    */
    /*  \   */
    /*   \  */
    /*    2 */
    if (y2 > y1) {
        /* steep */
        if (dy > dx) {
            dx = ((dx << 16) / dy);
            x = x1 << 16;
            for (y = y1; y <= y2; y++) {
                xx = x >> 16;
                p = &(data[(screenx * y) + xx]);
                DRAWMETHOD;
                if (xx < (screenx - 1)) {
                    p++;
                    /* DRAWMETHOD; */
                }
                x += dx;
            }
            goto end_of_line;
        }
        /* shallow */
        else {
            dy = ((dy << 16) / dx);
            y = y1 << 16;
            for (x = x1; x <= x2; x++) {
                yy = y >> 16;
                p = &(data[(screenx * yy) + x]);
                DRAWMETHOD;
                if (yy < (screeny - 1)) {
                    p += screeny;
                    /* DRAWMETHOD; */
                }
                y += dy;
            }
        }
    }
    /*    2 */
    /*   /  */
    /*  /   */
    /* 1    */
    else {
        /* steep */
        if (-dy > dx) {
            dx = ((dx << 16) / -dy);
            x = (x1 + 1) << 16;
            for (y = y1; y >= y2; y--) {
                xx = x >> 16;
                p = &(data[(screenx * y) + xx]);
                DRAWMETHOD;
                if (xx < (screenx - 1)) {
                    p--;
                    /* DRAWMETHOD; */
                }
                x += dx;
            }
            goto end_of_line;
        }
        /* shallow */
        else {
            dy = ((dy << 16) / dx);
            y = y1 << 16;
            for (x = x1; x <= x2; x++) {
                yy = y >> 16;
                p = &(data[(screenx * yy) + x]);
                DRAWMETHOD;
                if (yy < (screeny - 1)) {
                    p += screeny;
                    /* DRAWMETHOD; */
                }
                y += dy;
            }
            goto end_of_line;
        }
    }
end_of_line:
    emms();
    /* __asm__ __volatile__ ("emms"); */
}

#endif
